Topological Cluster Statistics (TCS)¶


Notebook 3: sensitivity improvements¶


This notebook contains scripts that evaluate sensitivity of TCS compared to cluster-based statistic.


Packages and basic functions¶


Loading required packages

In [1]:
import os
import numpy as np
import pandas as pd
import nibabel as nib
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm

Basic functions

In [2]:
def ensure_dir(file_name):
    os.makedirs(os.path.dirname(file_name), exist_ok=True)
    return file_name


def write_np(np_obj, file_path):
    with open(file_path, 'wb') as outfile:
        np.save(outfile, np_obj)


def load_np(file_path):
    with open(file_path, 'rb') as infile:
        return np.load(infile)

Plot settings (latex is used for better plotting)

In [3]:
sns.set()
sns.set_style("darkgrid")

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
In [4]:
plt.rc('text', usetex=True)
plt.rc('text.latex', preamble=r'\usepackage{mathtools} \usepackage{sfmath}')

plt.rc('xtick', labelsize=20)
plt.rc('ytick', labelsize=20)
plt.rc('axes', labelsize=24)

plt.rc('figure', dpi=500)

Loading the ground truth¶


The ground truth stored in notebook 2 is loaded here.

In [5]:
# list of all tasks and the cope number related to each selected contrast
tasks = {
    'EMOTION': '3',  # faces - shapes
    'GAMBLING': '6',  # reward - punish
    'RELATIONAL': '4',  # rel - match
    'SOCIAL': '6',  # tom - random
    'WM': '20',  # face - avg
}
In [6]:
# Compute mean and std, followed by a parametric z-score (one sample t-test)
ground_truth_effect = {}
# Base directory where files are stored at
base_dir='/data/netapp01/work/sina/structural_clustering/PALM_revision_1'

for task in tqdm(tasks, desc="Tasks loop", leave=True):
    ground_truth_effect[task] = load_np(
        '{}/ground_truth/cohen_d_{}_cope{}.dscalar.npy'.format(base_dir, task, tasks[task]),
    )
Tasks loop:   0%|          | 0/5 [00:00<?, ?it/s]

Loading PALM results¶


PALM results stored in notebook 1 is loaded here.

In [7]:
%%time

# Number of random repetitions
repetitions = 500
# Different sample sizes tested
# sample_sizes = [10, 20, 40, 80, 160, 320]
sample_sizes = [40]
# Different cluster defining thresholds
cdts = [3.3, 2.8, 2.6, 2.0, 1.6]
# Number of brainordinates in a cifti file
Nv = 91282
# Base directory where files are stored at
base_dir='/data/netapp01/work/sina/structural_clustering/PALM_revision_1'

# Store loaded results in nested python dictionaries
loaded_maps = {}
loaded_maps['uncorrected_tstat'] = {}
loaded_maps['spatial_cluster_corrected_tstat'] = {}
loaded_maps['topological_cluster_corrected_tstat'] = {}

# Only use the z=3.3, p=0.001 for the main analyses reported here
cdt = 3.3
for task in tqdm(tasks, desc="Tasks loop", leave=True):
    loaded_maps['uncorrected_tstat'][task] = {}
    loaded_maps['spatial_cluster_corrected_tstat'][task] = {}
    loaded_maps['topological_cluster_corrected_tstat'][task] = {}
    for sample_size in tqdm(sample_sizes, desc="Sample size loop", leave=False):
        loaded_maps['uncorrected_tstat'][task][f'N={sample_size}'] = load_np(
            f'{base_dir}/summary/uncorrected_tstat_{task}_{sample_size}_samples_{cdt}_CDT.npy',
        )
        loaded_maps['spatial_cluster_corrected_tstat'][task][f'N={sample_size}'] = load_np(
            ensure_dir(f'{base_dir}/summary/spatial_cluster_corrected_tstat_{task}_{sample_size}_samples_{cdt}_CDT.npy'),
        )
        loaded_maps['topological_cluster_corrected_tstat'][task][f'N={sample_size}'] = load_np(
            ensure_dir(f'{base_dir}/summary/topological_cluster_corrected_tstat_{task}_{sample_size}_samples_{cdt}_CDT.npy'),
        )
Tasks loop:   0%|          | 0/5 [00:00<?, ?it/s]
Sample size loop:   0%|          | 0/1 [00:00<?, ?it/s]
Sample size loop:   0%|          | 0/1 [00:00<?, ?it/s]
Sample size loop:   0%|          | 0/1 [00:00<?, ?it/s]
Sample size loop:   0%|          | 0/1 [00:00<?, ?it/s]
Sample size loop:   0%|          | 0/1 [00:00<?, ?it/s]
CPU times: user 123 ms, sys: 9.23 s, total: 9.35 s
Wall time: 57.5 s

Sensitivity analyses¶


Script below generates the results of sensitivity analysis reported in the manuscript

In [8]:
import scipy.stats as stats
from scipy.interpolate import CubicSpline
from scipy.interpolate import UnivariateSpline
from statsmodels.stats.power import TTestPower
from matplotlib.patches import Patch

%config InlineBackend.figure_format = 'retina'

plt.rc('figure', dpi=500)

analysis = TTestPower()

fig = plt.figure(figsize=(30, 12),constrained_layout=True)
gs = fig.add_gridspec(3, 5)

sample_size = 40
sample_sizes = [10, 20, 40, 80, 160, 320]

sample_colors = np.array(sns.color_palette("rainbow", len(sample_sizes)))
logp_threshold = -np.log10(0.05)

for ci, task in enumerate(tasks):
    for ri, method in enumerate(['spatial', 'topological', 'difference']):
        ax = fig.add_subplot(gs[ri, ci])
        
        scatterx = ground_truth_effect[task]
        
        si = 2

        t_stats = loaded_maps['uncorrected_tstat'][task][f'N={sample_size}']
        t_stats = t_stats[~np.isnan(t_stats).any(axis=1)]

        if method ==  'difference':
            topological_cluster_logps = loaded_maps['topological_cluster_corrected_tstat'][task][f'N={sample_size}']
            topological_cluster_logps = topological_cluster_logps[~np.isnan(topological_cluster_logps).any(axis=1)]
            topological_positive_effects = np.multiply(np.mean((topological_cluster_logps>logp_threshold) & (t_stats>0), 0), (ground_truth_effect[task]>0))
            topological_negative_effects = np.multiply(np.mean((topological_cluster_logps>logp_threshold) & (t_stats<0), 0), (ground_truth_effect[task]<0))

            spatial_cluster_logps = loaded_maps['spatial_cluster_corrected_tstat'][task][f'N={sample_size}']
            spatial_cluster_logps = spatial_cluster_logps[~np.isnan(spatial_cluster_logps).any(axis=1)]
            spatial_positive_effects = np.multiply(np.mean((spatial_cluster_logps>logp_threshold) & (t_stats>0), 0), (ground_truth_effect[task]>0))
            spatial_negative_effects = np.multiply(np.mean((spatial_cluster_logps>logp_threshold) & (t_stats<0), 0), (ground_truth_effect[task]<0))

            topological_scattery = (topological_positive_effects + topological_negative_effects)
            spatial_scattery = (spatial_positive_effects + spatial_negative_effects)
            scattery = topological_scattery - spatial_scattery

            xlim = (-1.5,1.5)

            # cubic spline fit
            bins = np.linspace(max(xlim[0], scatterx.min()), min(xlim[1], scatterx.max()), 31)
            digitized = np.digitize(scatterx, bins)
            x_means = [scatterx[(digitized == i) | (digitized == i + 1)].mean() for i in range(1, len(bins) - 1)]
            x_centers = bins[1:-1]
            y_means = [scattery[(digitized == i) | (digitized == i + 1)].mean() for i in range(1, len(bins) - 1)]
            y_sems = [stats.sem(scattery[(digitized == i) | (digitized == i + 1)]) for i in range(1, len(bins) - 1)]

            cs = CubicSpline(x_means, y_means, bc_type='natural', extrapolate=False)
            cs_sem = CubicSpline(x_means, y_sems, bc_type='natural', extrapolate=False)

            sample_x = np.linspace(scatterx.min(),scatterx.max(),200)
            sample_y = cs(sample_x)
            sample_y_sem = cs_sem(sample_x)

            sns.lineplot(
                x=sample_x,
                y=sample_y,
                style=True,
                dashes=[(1,3)],
                color=(0.01,0.05,0.07,1),
                legend=False,
                linewidth=3,
            )

            ax.fill_between(
                sample_x,
                sample_y - (sample_y_sem*1.96),
                sample_y + (sample_y_sem*1.96),
                color = np.append(sample_colors[si], 0.3),
            )

            ax.errorbar(
                x_means,
                y_means,
                xerr=(np.array(x_means) * 0),
                yerr=(np.array(y_sems)*1.96),
                color=np.append(sample_colors[si]/2, 1),
                fmt='.',
                linewidth=0,
                elinewidth=2,
                ms=2,
            )

            ax.axhline(y=np.nanmax(sample_y), xmin=xlim[0], xmax=xlim[1], dashes=(2,2), color=(0.9,0.5,0.5,1), linewidth=3,)
            
            if (np.nanmax(sample_y) < 0.06):
                ax.text(xlim[1]-0.45, float(np.nanmax(sample_y)) - 0.005, '\\textbf{{ {:.0f}\% }}'.format(100*float(np.nanmax(sample_y))), fontsize=20)
            else:
                ax.text(xlim[1]-0.45, float(np.nanmax(sample_y)) - 0.01, '\\textbf{{ {:.0f}\% }}'.format(100*float(np.nanmax(sample_y))), fontsize=20)

            ax.vlines(
                x=-0.5,
                ymin=-0,
                ymax=np.nanmax(sample_y),
                linestyles='dashed',
                colors=[(0.9,0.5,0.5,1)],
                linewidth=3,
            )
            ax.vlines(
                x=0.5,
                ymin=-0,
                ymax=np.nanmax(sample_y),
                linestyles='dashed',
                colors=[(0.9,0.5,0.5,1)],
                linewidth=3,
            )

            ax.text(-0.5 + 0.0001, float(cs(-0.5)), '\\boldmath{{$\\Rightarrow {:.0f}\% $}}'.format(100*float(cs(-0.5))), fontsize=20, color=(0.9,0.25,0.25,1))
            ax.text(0.5 + 0.0001, float(cs(0.5)), '\\boldmath{{$\\Rightarrow {:.0f}\% $}}'.format(100*float(cs(0.5))), fontsize=20, color=(0.9,0.25,0.25,1))

            ax.set_xlim(xlim)

            if (np.nanmax(sample_y) < 0.06):
                ax.set_ylim(-0.005, 0.055)
            else:
                ax.set_ylim(-0.005, 0.11)
    
        else:
            cluster_logps = loaded_maps[f'{method}_cluster_corrected_tstat'][task][f'N={sample_size}']
            cluster_logps = cluster_logps[~np.isnan(cluster_logps).any(axis=1)]

            positive_effects = np.multiply(np.mean((cluster_logps>logp_threshold) & (t_stats>0), 0), (ground_truth_effect[task]>0))
            negative_effects = np.multiply(np.mean((cluster_logps>logp_threshold) & (t_stats<0), 0), (ground_truth_effect[task]<0))

            scattery = positive_effects + negative_effects

            sns.scatterplot(
                x=scatterx,
                y=scattery,
                ax = ax,
                s=10,
                legend=False,
                # color=(0.1,0.5,0.7,0.1),
                color=np.append(sample_colors[si], 0.1),
                linewidth=0,
            )

            effects = np.linspace(-1.5,1.5,200)

            nocorr = [analysis.power(effect_size=x, nobs=sample_size, alpha=0.05) for x in effects]
            boncorr = [analysis.power(effect_size=x, nobs=sample_size, alpha=0.05/Nv) for x in effects]

            sns.lineplot(
                x=effects,
                y=nocorr,
                style=True,
                dashes=[(2,2)],
                color=(.3,.3,.3,1),
                legend=False,
            )
            sns.lineplot(
                x=effects,
                y=boncorr,
                style=True,
                dashes=[(2,2)],
                color=(.3,.3,.3,1),
                legend=False,
            )

            ax.vlines(
                x=-0.5,
                ymin=analysis.power(effect_size=-0.5, nobs=40, alpha=0.05/Nv),
                ymax=analysis.power(effect_size=-0.5, nobs=40, alpha=0.05),
                linestyles='dashed',
                colors=[(0.9,0.5,0.5,1)],
                linewidth=3,
            )
            ax.vlines(
                x=0.5,
                ymin=analysis.power(effect_size=0.5, nobs=40, alpha=0.05/Nv),
                ymax=analysis.power(effect_size=0.5, nobs=40, alpha=0.05),
                linestyles='dashed',
                colors=[(0.9,0.5,0.5,1)],
                linewidth=3,
            )

            # cubic spline fit
            bins = np.linspace(scatterx.min(), scatterx.max(), 31)
            digitized = np.digitize(scatterx, bins)
            x_means = [scatterx[(digitized == i) | (digitized == i + 1)].mean() for i in range(1, len(bins) - 1)]
            y_means = [scattery[(digitized == i) | (digitized == i + 1)].mean() for i in range(1, len(bins) - 1)]
            y_sems = [stats.sem(scattery[(digitized == i) | (digitized == i + 1)]) for i in range(1, len(bins) - 1)]

            cs = CubicSpline(x_means, y_means, bc_type='natural', extrapolate=False)
            cs_sem = CubicSpline(x_means, y_sems, bc_type='natural', extrapolate=False)

            sample_x = np.linspace(scatterx.min(),scatterx.max(),200)
            sample_y = cs(sample_x)
            sample_y_sem = cs_sem(sample_x)

            sns.lineplot(
                x=sample_x,
                y=sample_y,
                style=True,
                color=np.append(sample_colors[si]/2, 1),
                legend=False,
                linewidth=3,
            )

            ax.text(-0.5 + 0.0005, float(cs(-0.5)), '\\boldmath{{$\\Rightarrow {:.0f}\% $}}'.format(100*float(cs(-0.5))), fontsize=20, color=(0.9,0.25,0.25,1))
            ax.text(0.5 + 0.0005, float(cs(0.5)), '\\boldmath{{$\\Rightarrow {:.0f}\% $}}'.format(100*float(cs(0.5))), fontsize=20, color=(0.9,0.25,0.25,1))
            ax.text(0. + 0.05, float(cs(0.7)), '\\boldmath{{$\mu = {:.0f}\% $}}'.format(100*float(scattery[np.abs(scatterx)>0.2].mean())), fontsize=20, va='center', ha='center')

            ax.set_xlim(-1.5,1.5)
            ax.set_ylim(-0.05,1.05)

        xlabel = ''
        if ri == 2:
            xlabel = 'Effect size ($d$)'
        ax.set_xlabel(xlabel, fontsize=40)

        ylabel = ''
        if ci == 0:
            ylabel = '{}'.format(task)
        
        ax.set_facecolor(np.array([234,234,242])/255)
        ax.grid(color=(0.99,0.99,0.99,), linewidth=3)
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.spines['left'].set_visible(False)
        ax.tick_params(axis='both', colors=(0.5,0.5,0.5), labelcolor=(0,0,0), direction='out')


plt.show()
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
posx and posy should be finite values
In [ ]: